/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.discovery;

import java.util.Arrays;

import mosdi.fa.FiniteMemoryTextModel;
import mosdi.util.Alphabet;
import mosdi.util.BitArray;
import mosdi.util.Iupac;
import mosdi.util.Log;

/** Provides bounds for IUPAC characters derived from a FiniteMemoryTextModel
 *  over the DNA alphabet. */
public class TextModelIupacBounds {
	private static final BitArray[] generalizedAlphabet = Iupac.asGeneralizedAlphabet();
	private static final Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
	
	/** For each of the four classes {{ACGT},{RYSWKM},{BDHV},{N}} the
	 *  minimal possible probability. */
	private double[] charClassMinProbNoFuture;
	/** For each of the four classes {{ACGT},{RYSWKM},{BDHV},{N}} the
	 *  maximal possible probability. */
	private double[] charClassMaxProbNoFuture;
	private double[] charMinProb;
	/** The minimal (wrt all text model states) maximal probability of
	 *  each generalized character. */
	private double[] charMaxProb;
	private double[] charMinProbNoFuture;
	private double[] charMaxProbNoFuture;
	private double[][] maxCondProb;
	/** For each character c, maximum probability over all c_1,c_2,...,c_k
	 *  P(c|c_1)P(c_1|c_2)...P(c_{k-1}|c_k)P(c_k). */
	private double[] telescopeProbUpperBounds;
	
	/** maxCondProbDetailed[g0][g1][gHistory][gFuture] is an upper bound for the probability
	 *  P(S_t=g0 | S_{t-1}=gHistory, S_t=g1, S_{t+1}=gFuture, A), no matter what condition
	 *  A is imposed on positions t'<t-1 or t'>t+1. 
	 *  For gHistory/gFuture= iupacAlphabet.size(), it is assumed that the respective
	 *  position is subject to an unknown contraint. */
	private double[][][][] maxCondProbDetailed;
	
	public TextModelIupacBounds(FiniteMemoryTextModel textModel, boolean detailedBounds) {
		charMinProb = textModel.minProbabilityTable(generalizedAlphabet);
		charMaxProb = textModel.maxProbabilityTable(generalizedAlphabet);
		charMinProbNoFuture = new double[iupacAlphabet.size()];
		charMaxProbNoFuture = new double[iupacAlphabet.size()];
		BitArray gAll = new BitArray(Alphabet.getDnaAlphabet().size());
		gAll.invert();
		for (int g=0; g<iupacAlphabet.size(); ++g) {
			charMinProbNoFuture[g] = textModel.minConditionalProbabilityNoFuture(generalizedAlphabet[g], gAll);
			charMaxProbNoFuture[g] = textModel.maxConditionalProbabilityNoFuture(generalizedAlphabet[g], gAll);
		}
		charClassMinProbNoFuture = new double[4];
		Arrays.fill(charClassMinProbNoFuture, 1.0);
		charClassMaxProbNoFuture = new double[4];
		for (int c=0; c<generalizedAlphabet.length; ++c) {
			int m = Iupac.getCharacterMultiplicity(c)-1;
			charClassMinProbNoFuture[m] = Math.min(charClassMinProbNoFuture[m], charMinProbNoFuture[c]);
			charClassMaxProbNoFuture[m] = Math.max(charClassMaxProbNoFuture[m], charMaxProbNoFuture[c]);
		}
		if (detailedBounds) {
			maxCondProbDetailed = new double[iupacAlphabet.size()][iupacAlphabet.size()][iupacAlphabet.size()+1][iupacAlphabet.size()+1];
			for (int g0=0; g0<iupacAlphabet.size(); ++g0){
				for (int g1=0; g1<iupacAlphabet.size(); ++g1){
					for (int gHistory=0; gHistory<=iupacAlphabet.size(); ++gHistory){
						for (int gFuture=0; gFuture<=iupacAlphabet.size(); ++gFuture){
							BitArray[] bHistory = null;
							BitArray[] bFuture = null;
							if (gHistory<iupacAlphabet.size()) {
								bHistory = new BitArray[1];
								bHistory[0] = generalizedAlphabet[gHistory];
							}
							if (gFuture<iupacAlphabet.size()) {
								bFuture = new BitArray[1];
								bFuture[0] = generalizedAlphabet[gFuture];
							}
							maxCondProbDetailed[g0][g1][gHistory][gFuture] = textModel.maxConditionalProbability(generalizedAlphabet[g0], generalizedAlphabet[g1], bHistory, bFuture);   
						}
					}
				}
			}
		} else {
			maxCondProbDetailed = null;
		}
		maxCondProb = textModel.maxConditionalProbabilityTable(Iupac.asGeneralizedAlphabet());
		telescopeProbUpperBounds = getTelescopeProbUpperBounds();
	}
	
	/** Returns the minimal (w.r.t. all possible histories and futures) probability of
	 *  observing an instance of the generalized character.
	 */
	public double characterMinProbability(int generalizedCharacter) {
		return charMinProb[generalizedCharacter];
	}

	/** Returns the maximal (w.r.t. all possible histories and futures) probability of
	 *  observing an instance of the generalized character.
	 */
	public double characterMaxProbability(int generalizedCharacter) {
		return charMaxProb[generalizedCharacter];
	}

	/** Returns the maximal (w.r.t. all possible histories) probability of
	 *  observing an instance of the generalized character. IMPORTANT:
	 *  In general, this bound only holds when no conditions on the future 
	 *  are imposed.
	 */
	public double characterMaxProbabilityNoFuture(int generalizedCharacter) {
		return charMaxProbNoFuture[generalizedCharacter];
	}

	/** @param history Generalized character one position before current. A value of -1
	 *                 refers to situation where the constraint is unknown.
	 *  @param future  Generalized character one position after current. A value of -1
	 *                 refers to situation where the constraint is unknown.
	 */
	public double characterMaxProbability(int generalizedCharacter, int history, int future) {
		if (maxCondProbDetailed==null) {
			return charMaxProb[generalizedCharacter];
		} else {
			if (history==-1) history = iupacAlphabet.size();
			if (future==-1) future = iupacAlphabet.size();
			return maxCondProbDetailed[generalizedCharacter][iupacAlphabet.getIndex('N')][history][future];
		}
	}

	/** Returns the minimum characterMinProbability(char) over all 
	 *  generalized characters in the given class.
	 */
	public double characterClassMinProbabilityNoFuture(int characterClass) {
		return charClassMinProbNoFuture[characterClass];
	}

	/** Returns the maximum characterMaxProbability(char) over all 
	 *  generalized characters in the given class.
	 */
	public double characterClassMaxProbabilityNoFuture(int characterClass) {
		return charClassMaxProbNoFuture[characterClass];
	}

	/** Returns the maximal (w.r.t. all possible histories and futures) probability
	 *  that an observed character c is instance of generalizedCharacter1
	 *  given that c is an instance of generalizedCharacter2.
	 */
	public double characterMaxConditionalProbability(int generalizedCharacter1, int generalizedCharacter2) {
		return maxCondProb[generalizedCharacter1][generalizedCharacter2];
	}

	/** Returns the maximal (w.r.t. all possible histories and futures) probability
	 *  that an observed character c is instance of generalizedCharacter1
	 *  given that c is an instance of generalizedCharacter2.
	 *  @param history Generalized character one position before current. A value of -1
	 *                 refers to situation where the constraint is unknown.
	 *  @param future  Generalized character one position after current. A value of -1
	 *                 refers to situation where the constraint is unknown.
	 */
	public double characterMaxConditionalProbability(int generalizedCharacter1, int generalizedCharacter2, int history, int future) {
		if (maxCondProbDetailed==null) {
			return maxCondProb[generalizedCharacter1][generalizedCharacter2];
		} else {
			if (history==-1) history = iupacAlphabet.size();
			if (future==-1) future = iupacAlphabet.size();
			return maxCondProbDetailed[generalizedCharacter1][generalizedCharacter2][history][future];
		}
	}

	/** Returns telescope bound as defined in 
	 * Marschall and Rahmann. Proceedings of WABI 2010, pages 337-349.
	 * DOI: 10.1007/978-3-642-15294-8_28
	 */
	public double getTelescopeBound(int generalizedCharacter) {
		return telescopeProbUpperBounds[generalizedCharacter];
	}
	
	/** Computes for each iupac character c, 
	 * max_{c_1,c_2,...,c_k} P(c|c_1)P(c_1|c_2)...P(c_{k-1}|c_k)P(c_k). 
	 * ASSUMES that charMaxProb and condProbUpperBounds have been 
	 * initialized. 
	 */
	private double[] getTelescopeProbUpperBounds() {
		BitArray[] alphabet = Iupac.asGeneralizedAlphabet();
		// character order such that characters with most bits come first
		int[] order = Alphabet.getIupacAlphabet().buildIndexArray("NBDHVRYSWKMACGT");
		double[] results = new double[alphabet.length];
		for (int c : order) {
			double p = charMaxProb[c];
			for (int c2=0; c2<alphabet.length; ++c2) {
				if (c2==c) continue;
				BitArray b = new BitArray(alphabet[c]); 
				b.or(alphabet[c2]);
				if (!b.equals(alphabet[c2])) continue;
				p = Math.max(p, results[c2]*maxCondProb[c][c2]);
			}
			results[c] = p;
		}
		Log.printf(Log.Level.DEBUG, "Telescope bounds: %s\n", Arrays.toString(results));
		return results;
	}

}
